In [64]:
import re
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
from datetime import date, timedelta,datetime
import scipy
import os
import math
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
In [88]:
def calc_score(self):
"""
Invented in 1927 by Edwin B. Wilson
(http://www.evanmiller.org/how-not-to-sort-by-average-rating.html)
****
Deprecated, as this calculation is moved into SQL
****
"""
n = self.allvotes()
if n == 0:
return 0
pos = self.upvotes_count()
z = 1.96
p = 1.0 * pos / n
score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
return score
In [105]:
def calc_score(num_sents,positive):
"""
Invented in 1927 by Edwin B. Wilson
(http://www.evanmiller.org/how-not-to-sort-by-average-rating.html)
****
Deprecated, as this calculation is moved into SQL
****
"""
n = num_sents
if n == 0:
return 0
pos = positive
z = 1.96
p = 1.0 * pos / n
score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
return score
In [106]:
#wilson=calc_score()
calc_score(10,3)
In [94]:
# location of data
sent_results=pd.read_csv('../../output/data.csv')
In [82]:
plt.plot(sent_results['paragraph'],sent_results[' polarity'])
plt.xlabel('paragraph')
plt.ylabel('polarity')
Out[82]:
In [83]:
plt.plot(sent_results['paragraph'],sent_results[' polarity']/sent_results[' count'])
plt.xlabel('paragraph')
plt.ylabel('polarity (normalised)')
Out[83]:
In [86]:
plt.plot(sent_results['paragraph'],sent_results[' negative'],label='negative')
plt.plot(sent_results['paragraph'],sent_results[' positive'],label='positive')
plt.legend()
plt.xlabel('paragraph')
Out[86]:
In [85]:
plt.plot(sent_results['paragraph'],sent_results[' negative']/sent_results[' count'],label='negative')
plt.plot(sent_results['paragraph'],sent_results[' positive']/sent_results[' count'],label='positive')
plt.legend()
plt.xlabel('paragraph')
Out[85]:
In [87]:
sent_results
Out[87]:
In [ ]: